#define vec2 float2
#define vec3 float3
#define vec4 float4
#define rgb xyz
#define rgba xyzw

const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
const sampler_t sampler1 = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

vec4 INPUT1(image2d_t src_data,  __global FilterParam* param, vec2 tc)
{
	tc = (vec2)(tc.x, tc.y)*(vec2)(param->origROI[2], param->origROI[3]) + (vec2)(param->origROI[0], param->origROI[1]);
	return read_imagef(src_data, sampler, tc).zyxw;
}

vec4 INPUT2(image2d_t ovelay1,  __global FilterParam* param, vec2 tc)
{
	//tc = (vec2)(tc.x, tc.y)*(vec2)(param->origROI[2], param->origROI[3]) + (vec2)(param->origROI[0], param->origROI[1]);
	return read_imagef(ovelay1, sampler1, (vec2)(tc.x,tc.y) ).zyxw;
}

float _fract(float f )
{
	return f - floor(f);
}



__kernel void MAIN(
      __read_only image2d_t src_data,
	  __read_only image2d_t ovelay1,
      __write_only image2d_t dest_data,
      __global FilterParam* param,
	  int alpha)
{	
	int W = get_global_size(0);
	int H = get_global_size(1);
	int textH = param->height[0];
	float iGlobalTime = param->cur_time / param->total_time;	

	const vec3 vignetteVal = (vec3)(1.0f, 0.6f, 0.1f);
	const vec4 screenVal = (vec4)(0.1f, 0.1f, 0.0f, 1.0f);
	const float deSaturate = 0.0f;
	const int bNeedMerge = 0;

	int2 coordinate = (int2)(get_global_id(0), get_global_id(1));
	vec2 tc = ((float2)(get_global_id0( param), get_global_id1( param)) + (vec2)(0.5f))/(float2)(W,H);
	vec2 fragCoord = (vec2)(get_global_id0( param), get_global_id1( param));
	vec2 iResolution = (vec2)(W,H);
	
	const vec2 u_zoomRatio = (vec2)(1.2f, iResolution.y / iResolution.x);
	
    vec4 tuneColor = INPUT1(src_data, param,  tc);
    float maxTexDist = 0.5f * 1.4142f;
    float circleRadius = 0.45f;
    float originalA = tuneColor.w;
    
    //1st: Add vignette
    if(vignetteVal.x > 0.0f)
    {
        vec2 zoomRatio = 1.0f / u_zoomRatio;
        vec2 startLocTex = (1.0f - zoomRatio) / 2.0f;
        vec2 uniformTexCoord = (tc - startLocTex) / zoomRatio;
        
        float d = distance(uniformTexCoord, (vec2)(0.5f, 0.5f));
        if(d >= circleRadius) //Do screen-over for outter pixel round a circle
        {
#if 0
            float interpolateLen = maxTexDist - circleRadius; //0.5 * sqrt(2)
            float relativeDist = d - circleRadius;
            float alpha1 = 1.0f - (relativeDist / interpolateLen) * vignetteVal.y; //alpha1 = 1.0 - curDist / interpolationLen * blackVal
#else //Use the smooth blend function of glsl
            
            float interpolateVal = smoothstep(circleRadius, maxTexDist, d);
            float alpha1 = 1.0f - interpolateVal * vignetteVal.y;
#endif
            
            tuneColor = tuneColor * alpha1;
        }
        else
        {
#if 0
            float alpha1 = (1.0f - d / circleRadius) * vignetteVal.z;
#else //Use the smooth blend function of glsl
            
            float interpolateVal = smoothstep(0.0f, circleRadius, d);
            float alpha1 = (1.0f - interpolateVal) * vignetteVal.z;
#endif
            
            tuneColor = tuneColor + alpha1; //cO = cA * FA + cB * FB, cA = (1.0, 1.0, 1.0), FA = 1, FB = 1, cB is the source value
            tuneColor = min(tuneColor, (vec4)(1.0f)); //Since the value may be larger than 1.0
        }
    }
    
    //2nd: Do deSaturate or color curve mapping

    tuneColor.x = INPUT2(ovelay1,  param,  (vec2)(tuneColor.x, .16666f)).x;
	tuneColor.y = INPUT2(ovelay1,  param,  (vec2)(tuneColor.y, .5f)).y;
	tuneColor.z = INPUT2(ovelay1,  param,  (vec2)(tuneColor.z, .83333f)).z;

    
    //3rd: Do sreen blend
    if(screenVal.w > 0.0f)
    {
        vec4 mergeVal = (vec4)(screenVal.xyz, 0.0f);
        tuneColor = 1.0f - (1.0f - tuneColor) * (1.0f - mergeVal);
    }
    
    //4th: Do merge image blend
    if(bNeedMerge > 0)
    {
        //vec4 mergeVal = INPUT3(tc);
        //tuneColor = tuneColor * mergeVal;
    }
    tuneColor.w = originalA;
	vec4 inColor = INPUT1(src_data, param,  tc);
	
	write_imagef(dest_data, coordinate, (float4)(inColor.zyx*(1.0f - (float)alpha/100.0f) + tuneColor.zyx*(float)alpha/100.0f, inColor.w) );
	
}
